#!/usr/bin/env python3
"""
剔除N-RDD2024数据集中的D90标签
"""

import os
from pathlib import Path

def remove_d90_labels():
    """剔除所有标签文件中的D90标签（类别9）"""
    base_path = Path("N-RDD2024/Training and Validation Dataset")
    
    total_files = 0
    modified_files = 0
    removed_labels = 0
    
    print("开始剔除D90标签...")
    
    # 遍历所有国家的数据集
    for country_dir in base_path.iterdir():
        if not country_dir.is_dir() or not country_dir.name.endswith('_txt'):
            continue
            
        country_name = country_dir.name.replace('_txt', '')
        print(f"\n处理 {country_name} 数据集...")
        
        # 处理训练集和验证集
        for subset in ['train', 'valid']:
            labels_dir = country_dir / subset / "labels"
            
            if not labels_dir.exists():
                continue
                
            print(f"  处理 {subset} 集...")
            
            # 遍历所有标签文件
            for label_file in labels_dir.glob("*.txt"):
                total_files += 1
                
                try:
                    # 读取原始标签
                    with open(label_file, 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                    
                    # 过滤掉D90标签（类别9）
                    filtered_lines = []
                    file_modified = False
                    
                    for line in lines:
                        line = line.strip()
                        if line:
                            parts = line.split()
                            if parts and parts[0] == '9':  # D90标签
                                removed_labels += 1
                                file_modified = True
                            else:
                                filtered_lines.append(line)
                    
                    # 如果文件被修改，写回文件
                    if file_modified:
                        modified_files += 1
                        with open(label_file, 'w', encoding='utf-8') as f:
                            for line in filtered_lines:
                                f.write(line + '\n')
                        print(f"    修改文件: {label_file.name}")
                        
                except Exception as e:
                    print(f"    错误处理文件 {label_file}: {e}")
    
    print(f"\n剔除完成!")
    print(f"总共检查了 {total_files} 个标签文件")
    print(f"修改了 {modified_files} 个文件")
    print(f"剔除了 {removed_labels} 个D90标签")

if __name__ == "__main__":
    remove_d90_labels()